################################################
# Analysis for:
#
# Warner, Zach, Garrett N. Vande Kamp, and Soren Jordan.
# ``Conditional Relationships in Dynamic Models.'' 
#  Political Science Research & Methods
#
# Jennings and John Supplemental Analysis (Section 7.3)
#
# Required files: sm-jj-ajps.dta
#                 interp_urdf.R (for interpretation)
#
################################################

# Set your working directory here

library(dplyr)
library(ggplot2)
library(grid)
library(haven)
library(lmtest)
library(dynamac)
library(urca)
library(tseries)
library(polywog)
library(MASS)
library(plyr)
library(caret)
library(foreign)

completeFun <- function(data, desiredCols) {
  completeVec <- complete.cases(data[, desiredCols])
  return(data[completeVec, ])
}

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

r2.corr <- function(m) {
  lmfit <-  lm(model.response(model.frame(m)) ~ fitted(m))
  summary(lmfit)$r.squared
} 

# ur.df output is annoying. interp_urdf.R helper function for interpretation
#  Author: Hank Roark. 
#  https://gist.github.com/hankroark/968fc28b767f1e43b5a33b151b771bf9
source("interp_urdf.R")

##############################################################################################################################
# SM replication: Jennings and John
##############################################################################################################################
jj <- read.dta("sm-jj-ajps.dta")

# we only need a few of these variables
jj <- jj[,c("qs1freq","mip1qsx","party","misery")]

# create lags and differences like STATA does
jj$l_qs1freq <- c(NA,jj$qs1freq[-length(jj$qs1freq)])
jj$l_mip1qsx <- c(NA,jj$mip1qsx[-length(jj$mip1qsx)])
jj$l_party <- c(NA,jj$party[-length(jj$party)])
jj$l_misery <- c(NA,jj$misery[-length(jj$misery)])
jj$d_qs1freq <- jj$qs1freq - jj$l_qs1freq 
jj$d_mip1qsx <- jj$mip1qsx - jj$l_mip1qsx
jj$d_misery <- jj$misery - jj$l_misery

# create interactions -- ECM
jj$d_mip_d_misery <- jj$d_mip1qsx*jj$d_misery
jj$d_mip_l_misery <- jj$d_mip1qsx*jj$l_misery
jj$l_mip_d_misery <- jj$l_mip1qsx*jj$d_misery
jj$l_mip_l_misery <- jj$l_mip1qsx*jj$l_misery
# create interactions -- ADL
jj$mip_misery <- jj$mip1qsx*jj$misery
jj$mip_l_misery <- jj$mip1qsx*jj$l_misery
jj$l_mip_misery <- jj$l_mip1qsx*jj$misery
# variable for their interactions
jj$mipmisery <- jj$mip1qsx*jj$misery
jj$L.mipmisery <- c(NA,jj$mipmisery[-length(jj$mipmisery)])
jj$d.mipmisery <- jj$mipmisery - jj$L.mipmisery

# only the first lagged values are missing, so omit that row
jj.nona <- jj[-1,] 


######### Table SM. 5; ``J&J'' model #########
jjmod <- glm(d_qs1freq ~ l_qs1freq + d.mipmisery + L.mipmisery + party, data = jj.nona)
summary(jjmod)
BIC(jjmod)
r2.corr(jjmod)

######### Table SM. 3; ``General'' model #########
fullmod <- glm(d_qs1freq ~ l_qs1freq + d_mip1qsx + l_mip1qsx + d_misery + l_misery + d_mip_l_misery + 
                 l_mip_d_misery + d_mip_d_misery + l_mip_l_misery + party, data = jj.nona) 
summary(fullmod)
BIC(fullmod)
r2.corr(fullmod)

# out of sample prediction
tc <- trainControl("cv",5)
train(d_qs1freq ~ l_qs1freq + d.mipmisery + L.mipmisery + party, data = jj.nona, method="glm",trControl=tc)$results$RMSE
train(d_qs1freq ~ l_qs1freq + d_mip1qsx + l_mip1qsx + d_misery + l_misery + d_mip_l_misery + l_mip_d_misery + 
        d_mip_d_misery + l_mip_l_misery + party, data = jj.nona,
      method="glm",trControl=tc)$results$RMSE

##########################################################################
##    J & J's finding in this table is that the effect of MIP on QS     ##
##    is mediated by misery, but it operates independently of it. We    ##
##    can study this by holding change in misery at its mean and        ##
##    varying the level of MIP.                                         ##
##########################################################################

pred_effects_d_misery <- function(d_misery){
  thetas <- coef(fullmod)
  varcov <- vcov(fullmod)
  theta.tilde <- data.frame(mvrnorm(5000,thetas,varcov))
  theta.tilde <- theta.tilde[,c(2:10)] # all relevant variables. This order treats GDP as x and spending as z
  colnames(theta.tilde) <- c("gamma_1","theta_0","theta_1","theta_2","theta_3","theta_4","theta_5",
                             "theta_6","theta_7") # notation of Eq 4 in my paper
  # create relevant comparisons for human capital spending
  misery <- seq(unname(summary(jj.nona$misery)[1]), unname(summary(jj.nona$misery)[6]),length.out=250) # vary level
  # create hypothetical values based on these
  hyp_zt <- misery
  hyp_zt_l1 <- misery - d_misery
  hyp_zt_p1 <- misery + d_misery
  # create list to loop over
  hyplist <- as.list(NULL)
  # create storage for results
  effects <- data.frame(matrix(ncol=7,nrow=length(misery)))
  colnames(effects) <- c("spending","inst.lo","inst.est","inst.hi","total.lo","total.est","total.hi")
  for(i in 1:length(misery)){
    ### create temp storage
    hyplist[[i]] <- theta.tilde
    
    ### create hypotheticals
    # lagged level
    hyplist[[i]]$theta_2_hyp_zt_l1 <- hyplist[[i]]$theta_2*hyp_zt_l1[i]
    hyplist[[i]]$theta_3_hyp_zt_l1 <- hyplist[[i]]$theta_3*hyp_zt_l1[i]
    hyplist[[i]]$theta_4_hyp_zt_l1 <- hyplist[[i]]$theta_4*hyp_zt_l1[i]
    hyplist[[i]]$theta_5_hyp_zt_l1 <- hyplist[[i]]$theta_5*hyp_zt_l1[i]
    hyplist[[i]]$theta_6_hyp_zt_l1 <- hyplist[[i]]$theta_6*hyp_zt_l1[i]
    hyplist[[i]]$theta_7_hyp_zt_l1 <- hyplist[[i]]$theta_7*hyp_zt_l1[i]
    # current change
    hyplist[[i]]$theta_2_hyp_d <- hyplist[[i]]$theta_2*d_misery
    hyplist[[i]]$theta_3_hyp_d <- hyplist[[i]]$theta_3*d_misery
    hyplist[[i]]$theta_4_hyp_d <- hyplist[[i]]$theta_4*d_misery
    hyplist[[i]]$theta_5_hyp_d <- hyplist[[i]]$theta_5*d_misery
    hyplist[[i]]$theta_6_hyp_d <- hyplist[[i]]$theta_6*d_misery
    hyplist[[i]]$theta_7_hyp_d <- hyplist[[i]]$theta_7*d_misery
    # current level
    hyplist[[i]]$theta_2_hyp_zt <- hyplist[[i]]$theta_2*hyp_zt[i]
    hyplist[[i]]$theta_3_hyp_zt <- hyplist[[i]]$theta_3*hyp_zt[i]
    hyplist[[i]]$theta_4_hyp_zt <- hyplist[[i]]$theta_4*hyp_zt[i]
    hyplist[[i]]$theta_5_hyp_zt <- hyplist[[i]]$theta_5*hyp_zt[i]
    hyplist[[i]]$theta_6_hyp_zt <- hyplist[[i]]$theta_6*hyp_zt[i]
    hyplist[[i]]$theta_7_hyp_zt <- hyplist[[i]]$theta_7*hyp_zt[i]
    # future level
    hyplist[[i]]$theta_2_hyp_p1 <- hyplist[[i]]$theta_2*hyp_zt_p1[i]
    hyplist[[i]]$theta_3_hyp_p1 <- hyplist[[i]]$theta_3*hyp_zt_p1[i]
    hyplist[[i]]$theta_4_hyp_p1 <- hyplist[[i]]$theta_4*hyp_zt_p1[i]
    hyplist[[i]]$theta_5_hyp_p1 <- hyplist[[i]]$theta_5*hyp_zt_p1[i]
    hyplist[[i]]$theta_6_hyp_p1 <- hyplist[[i]]$theta_6*hyp_zt_p1[i]
    hyplist[[i]]$theta_7_hyp_p1 <- hyplist[[i]]$theta_7*hyp_zt_p1[i]
    # future change
    hyplist[[i]]$theta_2_hyp_dfut <- hyplist[[i]]$theta_2*d_misery
    hyplist[[i]]$theta_3_hyp_dfut <- hyplist[[i]]$theta_3*d_misery
    hyplist[[i]]$theta_4_hyp_dfut <- hyplist[[i]]$theta_4*d_misery
    hyplist[[i]]$theta_5_hyp_dfut <- hyplist[[i]]$theta_5*d_misery
    hyplist[[i]]$theta_6_hyp_dfut <- hyplist[[i]]$theta_6*d_misery
    hyplist[[i]]$theta_7_hyp_dfut <- hyplist[[i]]$theta_7*d_misery
    # negative future level
    hyplist[[i]]$theta_2_hyp_p1_neg <- (-1)*(hyplist[[i]]$theta_2*hyp_zt_p1[i])
    hyplist[[i]]$theta_3_hyp_p1_neg <- (-1)*(hyplist[[i]]$theta_3*hyp_zt_p1[i])
    hyplist[[i]]$theta_4_hyp_p1_neg <- (-1)*(hyplist[[i]]$theta_4*hyp_zt_p1[i])
    hyplist[[i]]$theta_5_hyp_p1_neg <- (-1)*(hyplist[[i]]$theta_5*hyp_zt_p1[i])
    hyplist[[i]]$theta_6_hyp_p1_neg <- (-1)*(hyplist[[i]]$theta_6*hyp_zt_p1[i])
    hyplist[[i]]$theta_7_hyp_p1_neg <- (-1)*(hyplist[[i]]$theta_7*hyp_zt_p1[i])
    # negative future change
    hyplist[[i]]$theta_2_hyp_dfut_neg <- (-1)*(hyplist[[i]]$theta_2*d_misery)
    hyplist[[i]]$theta_3_hyp_dfut_neg <- (-1)*(hyplist[[i]]$theta_3*d_misery)
    hyplist[[i]]$theta_4_hyp_dfut_neg <- (-1)*(hyplist[[i]]$theta_4*d_misery)
    hyplist[[i]]$theta_5_hyp_dfut_neg <- (-1)*(hyplist[[i]]$theta_5*d_misery)
    hyplist[[i]]$theta_6_hyp_dfut_neg <- (-1)*(hyplist[[i]]$theta_6*d_misery)
    hyplist[[i]]$theta_7_hyp_dfut_neg <- (-1)*(hyplist[[i]]$theta_7*d_misery)
    
    ### calculate quantities of interest
    hyplist[[i]]$inst.effect <- rowSums(hyplist[[i]][,c("theta_0", "theta_4_hyp_zt","theta_5_hyp_d","theta_6_hyp_d")])
    hyplist[[i]]$total.effect <- (rowSums(hyplist[[i]][,c("theta_1", "theta_7_hyp_zt","theta_5_hyp_d","theta_6_hyp_d",
                                                          "theta_4_hyp_dfut_neg","theta_6_hyp_dfut_neg")]))/((-1)*(hyplist[[i]]$gamma_1))
    effects$spending[i] <- hyp_zt[i]
    effects$inst.lo[i] <- unname(quantile(hyplist[[i]]$inst.effect, .025))
    effects$inst.est[i] <- unname(summary(hyplist[[i]]$inst.effect)[4])
    effects$inst.hi[i] <- unname(quantile(hyplist[[i]]$inst.effect, .975))
    effects$total.lo[i] <- unname(quantile(hyplist[[i]]$total.effect, .025))
    effects$total.est[i] <- unname(summary(hyplist[[i]]$total.effect)[4])
    effects$total.hi[i] <- unname(quantile(hyplist[[i]]$total.effect, .975))
  }
  return(effects)
}

effects <- pred_effects_d_misery(d_misery =  mean(jj.nona$d_misery))

# plot
apfig11.1 <- ggplot(effects, aes(x=spending,y=inst.est)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.border = element_rect(fill=NA,size=1,linetype="solid")) +
  geom_ribbon(aes(ymin=inst.lo, ymax=inst.hi), alpha=.25) +
  geom_line(aes(x=spending,y=inst.est)) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  coord_cartesian(ylim=c(-27, 34)) +
  labs(title="Instantaneous effects",x="Misery",y=expression(paste(Delta, "QS"))) # +
  #theme(text=element_text(family="minpro"))

apfig11.2 <- ggplot(effects, aes(x=spending,y=total.est)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.border = element_rect(fill=NA,size=1,linetype="solid")) +
  geom_ribbon(aes(ymin=total.lo, ymax=total.hi), alpha=.25) +
  geom_line(aes(x=spending,y=total.est)) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  coord_cartesian(ylim=c(-27, 34)) +
  labs(title="Total effects",x="Misery",y=expression(paste(Delta, "QS, cumulative"))) # +
  # theme(text=element_text(family="minpro"))


######### Figure SM. 20 #########
pdf("sm-jj-qs.pdf",width=8,height=4)
#showtext.begin()
multiplot(apfig11.1, apfig11.2, cols=2)
#showtext.end()
dev.off()



##### DIAGNOSING TIME SERIES PROPERTIES #####

######### Table SM. 13 #########
# given the model, we need to study the properties of QS, MIP, misery, and the interaction
jj$interaction <- jj$mip1qsx*jj$misery
jj$l_interaction <- c(NA,jj$interaction[-length(jj$interaction)])
jj$d_interaction <- jj$interaction - jj$l_interaction

# only the first lagged values are missing, so omit that row
jj.nona <- jj[-1,]

# univariate tests
adf.test(jj.nona$qs1freq)$p.value # cannot reject null of nonstationarity
adf.test(jj.nona$mip1qsx)$p.value # cannot reject null of nonstationarity
adf.test(jj.nona$misery)$p.value # cannot reject null of nonstationarity
adf.test(jj.nona$interaction)$p.value # cannot reject null of nonstationarity
adf.test(jj.nona$d_qs1freq) # reject null of nonstationarity
adf.test(jj.nona$d_mip1qsx) # reject null of nonstationarity
adf.test(jj.nona$d_misery) # reject null of nonstationarity
adf.test(jj.nona$d_interaction) # reject null of nonstationarity
# so it looks like a unit root
Box.test(jj.nona$qs1freq)$p.value # cannot reject null of nonstationarity
Box.test(jj.nona$mip1qsx)$p.value # cannot reject null of nonstationarity
Box.test(jj.nona$misery)$p.value # cannot reject null of nonstationarity
Box.test(jj.nona$interaction)$p.value # cannot reject null of nonstationarity
Box.test(jj.nona$d_qs1freq) # reject null of nonstationarity
Box.test(jj.nona$d_mip1qsx) # reject null of nonstationarity
Box.test(jj.nona$d_misery) # reject null of nonstationarity
Box.test(jj.nona$d_interaction) # reject null of nonstationarity
# so it looks like a unit root
kpss.test(jj.nona$qs1freq)$p.value # cannot reject null of nonstationarity
kpss.test(jj.nona$mip1qsx)$p.value # cannot reject null of nonstationarity
kpss.test(jj.nona$misery)$p.value # cannot reject null of nonstationarity
kpss.test(jj.nona$interaction)$p.value # cannot reject null of nonstationarity
kpss.test(jj.nona$d_qs1freq) # reject null of nonstationarity
kpss.test(jj.nona$d_mip1qsx) # reject null of nonstationarity
kpss.test(jj.nona$d_misery) # reject null of nonstationarity
kpss.test(jj.nona$d_interaction) # reject null of nonstationarity



# This is just illustrative; none of the below is reported in the SM

vars <- c("qs1freq","mip1qsx","misery","interaction")
for(i in 1:length(vars)){
  # ACFs
  eval(parse(text=paste("tmp <- jj.nona$",vars[i],sep="")))
  eval(parse(text=paste("tmpdata <- Acf(tmp, lag.max=25, na.action = na.pass, main='",vars[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, acf=tmpdata$acf)
  tmpdata$min <- tmpdata$max <- rep(NA,nrow(tmpdata))
  tmpdata$max[1] <- 1/sqrt(length(tmp))
  for(j in 2:nrow(tmpdata)){
    tmpdata$max[j] <- (sqrt((1/length(tmp))*(1+(2*sum((tmpdata$max[c(1:tmpdata$lag[j])])^2)))))
  }
  tmpdata$max <- qnorm(.975)*tmpdata$max
  tmpdata$min <- -1*(tmpdata$max)
  filename <- paste("./diagnostic plots/jj-acf-",vars[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, acf)) + 
          theme_bw() + geom_ribbon(aes(ymin=min, ymax=max),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("ACF: ",vars[i],sep=""),x="Lag",y="Autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
  # PACFs
  eval(parse(text=paste("tmpdata <- Pacf(tmp, lag.max=25, na.action = na.pass, main='",vars[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, pcf=tmpdata$acf)
  filename <- paste("./diagnostic plots/jj-pacf-",vars[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, pcf)) + 
          theme_bw() + geom_ribbon(aes(ymin=(qnorm(.975)/sqrt(length(tmp))), ymax=(-qnorm(.975)/sqrt(length(tmp)))),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("PACF: ",vars[i],sep=""),x="Lag",y="Partial autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
}  # looks like all unit roots



######### Table SM. 14 #########
#... and now let's confirm that everything is cointegrated.
# looking at ECM residuals (i.e. 1-step procedure)
fullmod <- glm(d_qs1freq ~ l_qs1freq + d_mip1qsx + l_mip1qsx + d_misery + l_misery + d_mip_l_misery + 
                 l_mip_d_misery + d_mip_d_misery + l_mip_l_misery + party, data = jj.nona) 
Box.test(resid(fullmod),lag=20,type="Ljung-Box") # reject null -- possibly worrying
adf.test(resid(fullmod)) # reject null of non-stationarity, so evidence residuals are stationary (i.e., cointegration)
kpss.test(resid(fullmod)) # do not reject null of no unit root, so evidence residuals are stationary (i.e., cointegration)

# Engle-Granger 2-step test
m1 <- lm(qs1freq ~ mip1qsx + misery + interaction, data = jj.nona)
residuals <- resid(m1)
res.ADF<-ur.df(residuals,type="none",selectlags="AIC")
summary(res.ADF) # reject null, so stationary, and cointegration.



######### Table SM. 15 #########
# A formal Johansen test
dat <- data.frame(cbind(jj.nona$qs1freq, jj.nona$mip1qsx, jj.nona$misery, jj.nona$interaction))
ecmtest <- ca.jo(dat,type="trace",ecdet="const")
summary(ecmtest) # evidence of cointegrating vectors. 
summary(ca.jo(dat,type="trace",ecdet="trend")) # either way, still cointegrated

# None of these are perfect, but I think a unit root with cointegration is again a reasonable conclusion here.




